County LA
1 2019 19562.55
2 2018 20516.12
3 2017 20663.27
4 2016 20287.96
5 2015 20432.53
6 2014 20742.77
7 2013 20611.29
8 2012 21076.22
9 2011 20064.62
10 2010 19721.28
11 2009 20590.38
12 2008 21115.74
13 2007 20536.16
14 2006 20377.01
15 2005 19711.01
16 2004 19507.01
17 2003 19056.04
18 2002 17917.34
19 2001 18212.58
20 2000 18891.64
21 1999 17665.12
22 1998 17234.63
23 1997 17578.66
24 1996 16322.05
25 1995 16823.63
26 1994 16065.58
27 1993 15759.08
28 1992 16377.79
29 1991 16310.88
30 1990 16960.72
31 Total Usage 566691.67
About
---
title: "Explore Housing and Electricity Relationship in LA"
output:
flexdashboard::flex_dashboard:
storyboard: true
social: menu
source: embed
---
```{r setup, include=FALSE}
library(flexdashboard)
```
```{r}
library(tidyverse)
library(ggplot2)
library(scales)
library(viridis)
library(igraph)
library(plotly)
library(sp)
library(maps)
library(maptools)
library(wordcloud2)
library(tm)
library(SnowballC)
library(wordcloud)
library(RColorBrewer)
Sys.setenv(MAPBOX_TOKEN = 11122223333444)
```
```{r}
electricity = read.csv("Electricity.csv")
groupbys <- electricity %>%
select('Owner', 'County', 'Highest_kV') %>%
group_by(County) %>%
drop_na()
groupbys['weight'] <- as.numeric(factor(groupbys[['Highest_kV']]))
groupbys <- groupbys %>%
select('Owner', 'County', 'weight')
groupbys <- groupbys %>%
filter(County %in% c('Los Angeles', 'San Diego',
'Orange', 'Riverside', 'San Bernardino',
'Santa Clara', 'Sacramento', 'San Francisco',
'Kern', 'Fresno'))
graphdf <- groupbys %>%
group_by(Owner, County) %>%
summarize(total_weight = sum(weight))
graphdf <- graphdf %>% drop_na()
```
### The network relationship between electricity station owner and top 10 CA cities.
```{r}
graphdf1 <- graph_from_data_frame(graphdf, directed = TRUE)
V(graphdf1)[1:10]$colour = 'gray'
V(graphdf1)[10]$colour = 'chocolate'
V(graphdf1)[11]$colour = 'chocolate1'
V(graphdf1)[12]$colour = 'chocolate2'
V(graphdf1)[13]$colour = 'chocolate3'
V(graphdf1)[14]$colour = 'coral'
V(graphdf1)[15]$colour = 'coral1'
V(graphdf1)[16]$colour = 'coral2'
V(graphdf1)[17]$colour = 'coral3'
V(graphdf1)[18]$colour = 'burlywood1'
V(graphdf1)[19]$colour = 'burlywood2'
E(graphdf1)$weight <- 2*scale(graphdf$total_weight) + 3
par(mar = c(0.3, 0.3, 1, 0.3))
plot(graphdf1,
layout=layout_in_circle,
edge.arrow.size = 0.3,
vertex.size=20,
edge.width = E(graphdf1)$weight,
vertex.color = adjustcolor(V(graphdf1)$colour,alpha.f = .8))
```
```{r}
consumption <- read.csv("Consumption.csv")
```
### Electricity Consumption in LA with Time Series
```{r}
fig <- plot_ly(
type = "scatter",
x = as.Date(consumption$County, format = "%Y"),
y = consumption$LA,
name = 'Electricity Consumption in LA',
mode = "markers",
)
fig <- fig %>%
layout(
title = "Electricity Consumption in LA"
)
fig
```
***
```{r}
consumption
```
```{r}
housing = read_csv("housing.csv")
latlong2county <- function(pointsDF) {
# Prepare SpatialPolygons object with one SpatialPolygon
# per county
counties <- map('county', fill=TRUE, col="transparent", plot=FALSE)
IDs <- sapply(strsplit(counties$names, ":"), function(x) x[1])
counties_sp <- map2SpatialPolygons(counties, IDs=IDs,
proj4string=CRS("+proj=longlat +datum=WGS84"))
# Convert pointsDF to a SpatialPoints object
pointsSP <- SpatialPoints(pointsDF[1:2],
proj4string=CRS("+proj=longlat +datum=WGS84"))
# Use 'over' to get _indices_ of the Polygons object containing each point
indices <- over(pointsSP, counties_sp)
# Return the county names of the Polygons object containing each point
countyNames <- sapply(counties_sp@polygons, function(x) x@ID)
countyNames[indices]
}
# Test the function using points in Wisconsin and Oregon.
testPoints <- data.frame(x = c(-90, -120), y = c(44, 44))
housing['county'] = latlong2county(housing)
housing <- housing %>%
filter(county %in% c("california,los angeles"))
housing$Category[housing$median_house_value<=100000] = '<100K'
housing$Category[housing$median_house_value>100000 & housing$median_house_value<=200000] = '100K~200K'
housing$Category[housing$median_house_value>200000 & housing$median_house_value<=300000] = '200K~300K'
housing$Category[housing$median_house_value>300000 & housing$median_house_value<=400000] = '300K~400K'
housing$Category[housing$median_house_value>400000 ] = '>400K'
```
### Los Angeles Housing Prices Distribution With Population.
```{r}
options(warn=-1)
plot_map = ggplot(housing,
aes(x = longitude, y = latitude, color = median_house_value,
hma = housing_median_age, tr = total_rooms, tb = total_bedrooms,
hh = households, mi = median_income)) +
geom_point(aes(size = population), alpha = 0.6) +
xlab("Longitude") +
ylab("Latitude") +
ggtitle("Los Angeles Housing Price Distribution") +
theme(plot.title = element_text(hjust = 0.5)) +
scale_color_viridis(option = "D", labels = comma) +
labs(color = "Median House Value (in $USD)", size = "Population")
plot_map
```
### The distribution of housing price amoung ocean proximity in LA
```{r}
fig <- plot_ly(housing, x = ~ocean_proximity, color = ~Category) %>%
add_histogram()
fig <- fig %>%
layout(
title = "The distribution of housing price amoung ocean proximity in LA"
)
fig
```
### WordCloud of LA House and Electricity Article
```{r}
text <- readLines('LA_housing_overview.txt')
text <- sapply(text,function(row) iconv(row, "latin1", "ASCII", sub=""))
corpus = VCorpus(VectorSource(text))
corpus = tm_map(corpus, content_transformer(tolower))
corpus = tm_map(corpus, removeNumbers)
corpus = tm_map(corpus, removePunctuation)
corpus = tm_map(corpus, removeWords, stopwords())
#corpus = tm_map(corpus, stemDocument)
corpus = tm_map(corpus, stripWhitespace)
```
```{r}
dtm = DocumentTermMatrix(corpus)
dtm = removeSparseTerms(dtm, 0.9999)
dataset = as.matrix(dtm)
v = sort(colSums(dataset),decreasing=TRUE)
myNames = names(v)
d = data.frame(word=myNames,freq=v)
pal2 <- brewer.pal(8,"Dark2")
wordcloud(d$word, colors = pal2,scale=c(4,.6), random.color=FALSE, d$freq, min.freq=15, max.words=200, random.order=FALSE)
```
### ABOUT
***
About